# Authors: Chris Piper and Mark Richardson
# Purpose: Estimate models of raw performance ratings and partisanship

# Load packages
library(dplyr)
library(ggplot2)
library(stringr)
library(fixest)

# Load data
load("/home/sfgs_team/performance_ratings/perf_ratings_pid.RData")

# Observe values of party variables
table(perf_stan$pid_3, perf_stan$pid_probe, useNA = "always")

# Recode to include leaners into parties;
# Do not include "Other Party" and "Neither" in Independent
# Code anyone who says Independent but then does not specify via probe
# if they say the probe Independent

perf_stan <- perf_stan %>%
  mutate(pid_complete = case_when(pid_3 == "Independent" & pid_probe == "Closer to the Democratic Party" ~ "Democrat",
                                  pid_3 == "Independent" & pid_probe == "Closer to the Republican Party"~ "Republican",
                                  pid_3 == "Other party (please specify):" & pid_probe == "Closer to the Democratic Party" ~ "Democrat",
                                  pid_3 == "Other party (please specify):" & pid_probe== "Closer to the Republican Party" ~ "Republican",
                                  pid_3 == "Don't know" & pid_probe == "Closer to the Democratic Party" ~ "Democrat",
                                  pid_3 == "Don't know" & pid_probe == "Closer to the Republican Party" ~ "Republican",
                                  pid_3 == "Democrat" ~ "Democrat",
                                  pid_3 == "Republican" ~ "Republican",
                                  pid_3 == "Independent"  & !is.na(pid_probe) ~ "Independent"))

perf_stan %>% filter(pid_3 == "Independent" & is.na(pid_probe))

table(perf_stan$pid_3, perf_stan$pid_complete, useNA = "always")

table(perf_stan$pid_probe, perf_stan$pid_complete, useNA = "always")

table(perf_stan$pid_complete, useNA = "always")

#### Models ####

# Check if partisans are more likely to give higher ratings
perf_stan$pid_complete <- relevel(as.factor(perf_stan$pid_complete), ref="Independent")

model_1 <- feols(perf_rating_org ~ pid_complete,
                 data = perf_stan,
                 vcov = ~ agency_rated)

summary(model_1)

model_2 <- feols(perf_rating_org ~ pid_complete | agency_rated,
                 data = perf_stan,
                 vcov = ~ agency_rated)

summary(model_2)


# Merging in Agency Ideology
table(perf_stan$agency_rated)

agency_workplace_ideologies <- readr::read_csv("/home/sfgs_team/performance_ratings/agency_workplace_ideologies.csv")

perf_stan_update <- left_join(perf_stan, agency_workplace_ideologies,
                              by="agency_rated")

model_3 <- feols(perf_rating_org ~ pid_complete * agency_rated_ideo_rating,
                 data = perf_stan_update,
                 vcov = ~ agency_rated)

summary(model_3)

#### Variance in ratings - R3 #####

rm(list = ls())

load("/home/shared/sfgs_3/data/ratings/performance_ratings/01_performance_ratings_for_model.RData")

# Reshape the set of full ratings
library(tidyr)

perf_1 <- perf_ratings %>%
  select(agency_rated_1, perf_rating_1) %>%
  rename(agency_rated = agency_rated_1,
         perf_rating = perf_rating_1)

perf_2 <- perf_ratings %>%
  select(agency_rated_2, perf_rating_2) %>%
  rename(agency_rated = agency_rated_2,
         perf_rating = perf_rating_2)

perf_3 <- perf_ratings %>%
  select(agency_rated_3, perf_rating_3) %>%
  rename(agency_rated = agency_rated_3,
         perf_rating = perf_rating_3)

perf_4 <- perf_ratings %>%
  select(agency_rated_4, perf_rating_4) %>%
  rename(agency_rated = agency_rated_4,
         perf_rating = perf_rating_4)

perf_5 <- perf_ratings %>%
  select(agency_rated_5, perf_rating_5) %>%
  rename(agency_rated = agency_rated_5,
         perf_rating = perf_rating_5)

perf_all <- bind_rows(perf_1,
                      perf_2,
                      perf_3,
                      perf_4,
                      perf_5) %>%
  drop_na()

rm(perf_1,
   perf_2,
   perf_3,
   perf_4,
   perf_5)

perf_all_ag <- perf_all %>%
  group_by(agency_rated) %>%
  summarize(perf_mean = mean(perf_rating),
            perf_var = var(perf_rating),
            n_all = n())

perf <- inner_join(perf_all_ag, perf_inf_priors %>% rename(n_inf = n),
                   by = "agency_rated")

perf <- perf %>%
  mutate(prop_inf = n_inf / n_all,
         dif_var = perf_var - perf_inf_var)

summary(perf$prop_inf)

summary(perf$dif_var)
IQR(perf$dif_var, na.rm = TRUE)

sum(perf$n_inf)/sum(perf$n_all)
